

set more off, permanent

******************************************************************************
****This Do File Produces the Regression Results Reported in the Paper********
******************************************************************************

qui use "${hmrc_datadir}\export_reg_final_ukmncs_fy", clear

xtset id year

*post-reform variables
gen post=year>=2009
gen r_lowwedge_post=r_lowwedge*post
gen r_highwedge_post=r_highwedge*post
gen r_post=related*post
*controls
gen gdppc_usd=gdppc/exchange_rate
sum gdppc_usd
gen lgdppc=ln(gdppc_usd)
gen related_lnGDP=related*lgdppc

global outcome lnp_weight

estimates clear


*******Table 3: Effect of the Tax Differentials on Transfer Pricing by UK MNCs: Baseline Results****

*column 1: three-way linear regression
eststo: reghdfe ${outcome} r_lowwedge r_highwedge ///
, absorb(i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode) resid(lnp_resid) vce(cluster ctry_year)
  
*column 2: controlling for the intereaction bewteen related and GDP in destination country

eststo: reghdfe ${outcome} r_lowwedge r_highwedge related_lnGDP ///
, absorb(i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode) resid(lnp_resid2) vce(cluster ctry_year)


***Dynamic Ownership***

use "${hmrc_datadir}\export_reg_final_ukmncs_fy_dynamicownership", clear

count if related2!=related
gen drop=related2!=related
bysort identifier: egen drop_sample=max(drop)
tab drop_sample

xtset id year

*post-reform variables
gen post=year>=2009
gen r_lowwedge_post=r_lowwedge*post
gen r_highwedge_post=r_highwedge*post
gen r_post=related*post

gen r2_lowwedge=related2*I_low_wedge
gen r2_highwedge=related2*I_high_wedge

gen r2_lowwedge_post=related2*I_low_wedge*post
gen r2_highwedge_post=related2*I_high_wedge*post
gen r2_post=related2*post
gen related2_lnGDP=related2*lgdppc


*controls
gen gdppc_usd=gdppc/exchange_rate
gen lgdppc=ln(gdppc_usd)
gen related_lnGDP=related*lgdppc

global outcome lnp_weight

*column 3: dropping firms with changed ownership structure:

eststo: reghdfe ${outcome} r_lowwedge r_highwedge related_lnGDP if drop_sample==0 ///
, absorb(i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode) vce(cluster ctry_year)
  

*column 4: using the updated ownership structure for all firms:

eststo: reghdfe ${outcome} r2_lowwedge r2_highwedge related2_lnGDP  ///
, absorb(i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode) vce(cluster ctry_year)
  
qui estout est* using "${hmrc_resultdir}\regression_table3.xls", ///
cells(b(star fmt(%9.3f)) se(par(`"("'`")"'))) ///
starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 r2_a N , ///
fmt(%9.3f %9.0g) labels(R-squared))  label replace 

estimates clear

*******Table 4: Effect of the Tax Differentials on Transfer Pricing by UK MNCs: Tax Reform****

qui use "${hmrc_datadir}\export_reg_final_ukmncs_fy", clear

xtset id year

*post-reform variables
gen post=year>=2009
gen r_lowwedge_post=r_lowwedge*post
gen r_highwedge_post=r_highwedge*post
gen r_post=related*post
*controls
gen gdppc_usd=gdppc/exchange_rate
sum gdppc_usd
gen lgdppc=ln(gdppc_usd)
gen related_lnGDP=related*lgdppc

global outcome lnp_weight

estimates clear

*column 1: quasi-natural experiment
eststo: reghdfe ${outcome} r_lowwedge r_highwedge r_post r_lowwedge_post r_highwedge_post if year!=2009, absorb(i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode) resid(lnp_resid) vce(cluster ctry_year)

*column 2: quasi-natural experiment with pricing-to-market data
eststo: reghdfe ${outcome} r_lowwedge r_highwedge r_post r_lowwedge_post r_highwedge_post related_lnGDP if year!=2009, absorb(i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode) resid(lnp_resid2) vce(cluster ctry_year)

*column 3: three-way linear regression using observations in column 2
eststo: reghdfe ${outcome} r_lowwedge r_highwedge if lnp_resid!=. ///
, absorb(i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode) vce(cluster ctry_year)

eststo: reghdfe ${outcome} r_lowwedge r_highwedge related_lnGDP if lnp_resid2!=. ///
, absorb(i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode) vce(cluster ctry_year)

drop lnp_resid lnp_resid2

qui estout est* using "${hmrc_resultdir}\regression_table4.xls", ///
cells(b(star fmt(%9.3f)) se(par(`"("'`")"'))) ///
starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 r2_a N , ///
fmt(%9.3f %9.0g) labels(R-squared))  label replace 


*******Table 5: Effect of the Tax Differentials on Transfer Pricing by UK MNCs: Placebo Tests****

qui use "${hmrc_datadir}\export_reg_final_ukmncs_fy", clear

xtset id year

*post-reform variables
gen post=year>=2009
gen r_lowwedge_post=r_lowwedge*post
gen r_highwedge_post=r_highwedge*post
gen r_post=related*post
*controls

global outcome lnp_weight

estimates clear
cap drop post2009

gen pre_reform=year<=2009
forv i=2005/2011 {
gen year`i'=year==`i'
gen post`i'=year>=`i'
gen r_lowwedge_post`i'=r_lowwedge*post`i'
gen r_highwedge_post`i'=r_highwedge*post`i'
gen r_post`i'=related*post`i'
gen r_year`i'=related*year`i'
gen r_lowwedge_year`i'=r_lowwedge*year`i'
gen r_highwedge_year`i'=r_highwedge*year`i'
}

forv i=2006/2008 {

eststo: reghdfe ${outcome} r_lowwedge r_highwedge r_post`i' r_lowwedge_post`i' r_highwedge_post`i' if pre_reform==1, absorb(i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode) vce(cluster ctry_year)

qui estout est* using "${hmrc_resultdir}\regression_table5.xls", ///
cells(b(star fmt(%9.3f)) se(par(`"("'`")"'))) ///
starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 N , ///
fmt(%9.3f %9.0g) labels(R-squared))  label replace 
}

estimates clear

*******Table 6: Effect of the Tax Differentials on Trade Diversion by UK MNCs*********


qui use "${hmrc_datadir}\export_reg_final_ukmncs_fy_ntrans", clear
xtset id year
cap drop freq_trans freq_trans2 *_freq *_freq2 

*post-reform variables
gen post=year>=2009
gen r_lowwedge_post=r_lowwedge*post
gen r_highwedge_post=r_highwedge*post
gen r_post=related*post

gen gdppc_usd=gdppc/exchange_rate
gen related_lnGDP=related*lgdppc

*trading weight
gen ln_notrans=ln(no_trans)
*trading mass
gen ln_weight=ln(netmass)
*trading volume
gen ln_svalue=ln(svalue)

estimates clear

foreach x in ln_weight lnp_weight ln_svalue {
global outcome `x'

******Linear Specification*******
eststo: reghdfe ${outcome} r_lowwedge r_highwedge related_lnGDP ///
, absorb(i.identifier#i.comcode#i.ctry i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)

eststo: reghdfe ${outcome} r_lowwedge r_highwedge r_post r_lowwedge_post r_highwedge_post related_lnGDP if year!=2009 ///
, absorb(i.identifier#i.comcode#i.ctry i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)

}


qui estout est* using "${hmrc_resultdir}\regression_table6.xls", ///
cells(b(star fmt(%9.3f)) se(par(`"("'`")"'))) ///
starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 r2_a N , ///
fmt(%9.3f %9.0g) labels(R-squared)) label replace 

estimates clear


*********Table 7: Comparison with Existing Studies*********************

qui use "${hmrc_datadir}\export_reg_final_ukmncs_fy", clear

xtset id year
global outcome lnp_weight

gen ln_exchange=ln(exchange_rate)
gen ln_gdp=ln(gdp)
gen ln_gdpc=ln(gdppc)
gen ln_dist=ln(dist)
gen ln_sale=ln(turnover)
gen ln_wrks=ln(no_employees)
gen ln_ppl=ln(Population)

gen gdppc_usd=gdppc/exchange_rate
gen lgdppc=ln(gdppc_usd)
gen related_lnGDP=related*lgdppc
gen related_dist=related*ln_dist

gen tax_diff=tax-uk_tax
gen oneminustau=1-tax/100
gen log_oneminustau=log(oneminustau)
gen r_log_tau=related*log_oneminustau
gen r_taxdiff=related*tax_diff
gen related_haven=related*tax_haven

estimates clear

*Column 1: No FE at all
eststo: xtreg ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge ///
, vce(robust)	

*Column 2-5: Davies at all
eststo: reghdfe ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge if year==2007 ///
, absorb(i.comcode#i.identifier#related) vce(cluster ctry)

eststo: reghdfe ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge if year==2007 ///
, absorb(i.comcode#i.identifier#related i.ctry) vce(cluster ctry)

eststo: reghdfe ${outcome} log_oneminustau r_log_tau if year==2007 ///
, absorb(i.comcode#i.identifier#related) vce(cluster ctry)

eststo: reghdfe ${outcome} r_log_tau related_haven related_lnGDP related_dist if year==2007 ///
, absorb(i.comcode#i.identifier#related i.ctry) vce(cluster ctry)

*Columns 6-7: Vicard (2015)
eststo: reghdfe ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge ///
, absorb(i.comcode#i.identifier#i.year i.ctry#i.comcode#i.year) vce(cluster ctry_year)

eststo: reghdfe ${outcome} r_taxdiff ///
, absorb(i.comcode#i.identifier#i.year i.ctry#i.comcode#i.year) vce(cluster ctry_year)

*Columns 8-9: Cristen (2015)
eststo: reghdfe ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge ///
, absorb(i.ctry#i.identifier#i.comcode i.low_tax_country#i.year year) ///
vce(cluster ctry_year)

eststo: reghdfe ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge ///
ln_sale ln_wrks ln_ppl ln_gdpc ln_exchange, absorb(year i.ctry#i.identifier#i.comcode i.low_tax_country#i.year) ///
vce(cluster ctry_year)

qui estout est* using "${hmrc_resultdir}\regression_table7.xls", ///
cells(b(star fmt(%9.3f)) se(par(`"("'`")"'))) ///
starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 r2_o r2_a N , ///
fmt(%9.3f %9.0g) labels(R-squared)) label replace

est clear
*CBT (2016)
reghdfe ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge ///
, absorb(i.identifier#i.comcode#i.ctry i.year#i.identifier#i.comcode ///
i.year#i.ctry#i.comcode) vce(cluster ctry_year)  residuals(resid)

gen in_cbtsample=resid!=.

*check re sample difference
keep if in_cbtsample==1

*Column 1: No FE at all
eststo: xtreg ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge ///
, vce(robust)	

*Column 2-5: Davies at all
eststo: reghdfe ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge if year==2007 ///
, absorb(i.comcode#i.identifier#related) vce(cluster ctry)

eststo: reghdfe ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge if year==2007 ///
, absorb(i.comcode#i.identifier#related i.ctry) vce(cluster ctry)

eststo: reghdfe ${outcome} log_oneminustau r_log_tau if year==2007 ///
, absorb(i.comcode#i.identifier#related) vce(cluster ctry)

eststo: reghdfe ${outcome} r_log_tau related_haven related_lnGDP related_dist if year==2007 ///
, absorb(i.comcode#i.identifier#related i.ctry) vce(cluster ctry)

*Columns 6-7: Vicard (2015)
eststo: reghdfe ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge ///
, absorb(i.comcode#i.identifier#i.year i.ctry#i.comcode#i.year) vce(cluster ctry_year)

eststo: reghdfe ${outcome} r_taxdiff ///
, absorb(i.comcode#i.identifier#i.year i.ctry#i.comcode#i.year) vce(cluster ctry_year)

*Columns 8-9: Cristen (2015)
eststo: reghdfe ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge ///
, absorb(i.ctry#i.identifier#i.comcode i.low_tax_country#i.year year) ///
vce(cluster ctry_year)

eststo: reghdfe ${outcome} I_low_wedge I_high_wedge r_lowwedge r_highwedge ///
ln_sale ln_wrks ln_ppl ln_gdpc ln_exchange, absorb(year i.ctry#i.identifier#i.comcode i.low_tax_country#i.year) ///
vce(cluster ctry_year)

qui estout est* using "${hmrc_resultdir}\regression_table7.xls", ///
cells(b(star fmt(%9.3f)) se(par(`"("'`")"'))) ///
starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 r2_o r2_a N , ///
fmt(%9.3f %9.0g) labels(R-squared)) label append


estimates clear


*********Table 8: Effect of the Tax Differentials on Transfer Pricing by UK MNCs in Tax Havens*********************


use "${hmrc_datadir}\export_reg_final_ukmncs_fy_ntrans_newhavens",clear

xtset id year
global outcome lnp_weight

*controls
gen gdppc_usd=gdppc/exchange_rate
sum gdppc_usd
gen lgdppc=ln(gdppc_usd)
gen related_lnGDP=related*lgdppc

estimates clear

tab tax_haven
tab Hines_havens
tab OECD_havens

gen r_lowwedge_haven=r_lowwedge*tax_haven
gen r_highwedge_haven=r_highwedge*tax_haven

gen r_lowwedge_haven1=r_lowwedge*Hines_haven
gen r_highwedge_haven1=r_highwedge*Hines_haven

gen r_lowwedge_haven2=r_lowwedge*OECD_haven
gen r_highwedge_haven2=r_highwedge*OECD_haven

est clear
* using original tax_haven indicator
eststo:reghdfe ${outcome} r_lowwedge r_highwedge related_lnGDP if tax_haven==1 ///
, absorb(i.identifier#i.comcode#i.ctry i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)

eststo:reghdfe ${outcome} r_lowwedge r_highwedge related_lnGDP if tax_haven==0 ///
, absorb(i.identifier#i.comcode#i.ctry i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)

eststo: reghdfe ${outcome} r_lowwedge r_highwedge ///
r_lowwedge_haven r_highwedge_haven related_lnGDP ///
, absorb (i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode) vce(cluster ctry_year)

*using Hines 2005 tax_haven indicator
eststo:reghdfe ${outcome} r_lowwedge r_highwedge related_lnGDP if Hines_haven==1 ///
, absorb(i.identifier#i.comcode#i.ctry i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)

eststo:reghdfe ${outcome} r_lowwedge r_highwedge related_lnGDP if Hines_haven==0 ///
, absorb(i.identifier#i.comcode#i.ctry i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)

eststo: reghdfe ${outcome} r_lowwedge r_highwedge ///
r_lowwedge_haven1 r_highwedge_haven1 related_lnGDP ///
, absorb (i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode) vce(cluster ctry_year)

* using OECD tax_haven indicator
eststo:reghdfe ${outcome} r_lowwedge r_highwedge related_lnGDP if OECD_haven==1 ///
, absorb(i.identifier#i.comcode#i.ctry i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)

eststo:reghdfe ${outcome} r_lowwedge r_highwedge related_lnGDP if OECD_haven==0 ///
, absorb(i.identifier#i.comcode#i.ctry i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)

eststo: reghdfe ${outcome} r_lowwedge r_highwedge ///
r_lowwedge_haven2 r_highwedge_haven2 related_lnGDP ///
, absorb (i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode) vce(cluster ctry_year)

*check using Davies et al tax haven countries
gen haven_sample = (alpha3code=="BHS")|(alpha3code=="BMU")|(alpha3code=="CYM")| ///
(alpha3code=="CYP")|(alpha3code=="HKG")|(alpha3code=="IRL")|(alpha3code=="LUX")| ///
(alpha3code=="MLT")|(alpha3code=="SGP")|(alpha3code=="CHE")

eststo: reghdfe ${outcome} r_lowwedge r_highwedge related_lnGDP if haven_sample==1 ///
, absorb(i.identifier#i.comcode#i.ctry i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)


qui estout est* using "${hmrc_resultdir}\regression_table8.xls", ///
cells(b(star fmt(%9.3f)) se(par(`"("'`")"'))) ///
starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 r2_a N , ///
fmt(%9.3f %9.0g) labels(R-squared))  label replace 

estimates clear


*****************Table 9: Heterogeneous Transfer Mispricing in R&D *********************************

set more off
estimates clear

qui use "S:\Projects\Oxford - Transfer Pricing\Data\export_reg_final_ukmncs_fy_rd", clear
drop  differentiated_con differentiated_lib
merge m:1 comcode using "${hmrc_datadir}\Trade\diff" /*renewed SITC.Rev3 matched with SITC Rev2*/
drop if _merge==2
drop _merge

xtset id year

*controls
gen gdppc_usd=gdppc/exchange_rate
sum gdppc_usd
gen lgdppc=ln(gdppc_usd)
gen related_lnGDP=related*lgdppc

gen r_lowwedge_diff=r_lowwedge*diff_con
gen r_highwedge_diff=r_highwedge*diff_con

gen r_lowwedge_diff2=r_lowwedge*diff_lib
gen r_highwedge_diff2=r_highwedge*diff_lib


global outcome lnp_weight

*column 1: By R&D Intensity
global q=3
xtile rd_d=rd_firm, n($q)
qui tab rd_d, gen (rd_d)

forv i=1/$q {
qui gen low_wedge_rd`i'=I_low_wedge*rd_d`i'
qui gen r_lowwedge_rd`i'=r_lowwedge*rd_d`i'
}

eststo: reghdfe ${outcome} ///
low_wedge_rd* r_lowwedge_rd* r_highwedge related_lnGDP, ///
absorb(i.identifier#i.comcode#i.ctry i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)

*column 2: By R&D Intensity Controlling for Firm Size 

eststo: reghdfe ${outcome} ///
low_wedge_rd* r_lowwedge_rd* low_wedge_kd* r_lowwedge_kd2 r_lowwedge_kd3 r_highwedge related_lnGDP, ///
absorb(i.identifier#i.comcode#i.ctry i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)


*column 3: By R&D Intensity controlling for Differential Products

eststo: reghdfe ${outcome} r_lowwedge r_highwedge ///
r_lowwedge_diff2 low_wedge_rd* r_lowwedge_rd2 r_lowwedge_rd3 related_lnGDP, ///
absorb(i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)

*column 4: By R&D Intensity controlling both for Differential Products and Firm size 
 
eststo: reghdfe ${outcome} r_lowwedge r_highwedge ///
r_lowwedge_diff2 low_wedge_rd* r_lowwedge_rd2 r_lowwedge_rd3 ///
low_wedge_kd* r_lowwedge_kd2 r_lowwedge_kd3 related_lnGDP, absorb(i.identifier#i.comcode#i.ctry ///
i.year#i.identifier#i.comcode i.year#i.ctry#i.comcode)  vce(cluster ctry_year)

qui estout est* using "${hmrc_resultdir}\regression_table9.xls", ///
cells(b(star fmt(%9.3f)) se(par(`"("'`")"'))) ///
starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 r2_a N , ///
fmt(%9.3f %9.0g) labels(R-squared)) label replace 
